This is the latest report on retracted publications in the PubMed database. It was generated on July 30 2015. This report replaces the application PMRetract, formerly hosted at Heroku.
Each section contains two charts. The first (blue) contains data about retracted publications. The second (orange) contains data about retraction notices. The PubMed search terms for these are, respectively:
This chart shows the number of retracted publications per year. PubMed uses a variety of different dates; the year used here is the date that the record was created (CRDT).
Clicking on a year will open a new window at PubMed showing the retracted articles for that year.
This chart shows the cumulative sum of retracted publications per year. The year used here is the date that the record was created (CRDT).
Clicking on a year will open a new window at PubMed showing the retracted articles from 1959 up to and including that year.
This chart shows the rate of retracted publications per year, as retractions per 100 000 publications. The year used here is the date that the record was created (CRDT).
Clicking on a year will open a new window at PubMed showing the retracted articles for that year.
This chart shows the top 20 journals by number of retracted articles. See section 5 for journals ranked by retractions relative to total articles.
Clicking on a journal name will open a new window at PubMed showing the retracted articles from that journal.
This chart ranks the top 20 retractions by journal. For each journal, retractions (or retraction notices) per 100 000 publications from that journal are shown.
Clicking on a journal name will open a new window at PubMed showing the retracted articles from that journal.
This code loads required libraries and pre-saved data.
library(rCharts)
library(rentrez)
library(XML)
setwd("../../data")
doc.retd <- xmlTreeParse("retracted.xml", useInternalNodes = TRUE)
doc.retOf <- xmlTreeParse("retractionOf.xml", useInternalNodes = TRUE)
years.total <- read.csv("years.csv")
jour.retd <- read.csv("journals_retracted.csv")
jour.retOf <- read.csv("journals_retractionOf.csv")
This code generates the timeline chart.
plotTimeline <- function(d, term) {
dates <- xpathSApply(d, "//PubmedData/History/PubMedPubDate[@PubStatus='entrez']/Year",
xmlValue)
df1 <- as.data.frame(table(dates), stringsAsFactors = FALSE)
hc <- Highcharts$new()
hc$title(text = "Retracted publications by year of Entrez record creation")
hc$series(data = df1$Freq, type = "column")
hc$xAxis(categories = df1$dates, labels = list(rotation = 90, formatter = paste("#! function() { return '<a href=\"http://www.pubmed.org/?term=%22",
term, "%22[PTYP] AND ' + escape(this.value) + '[CRDT]\" target=\"_blank\">' + this.value + '</a>'; } !#",
sep = ""), useHTML = "true"), title = list(text = "year"))
hc$yAxis(title = list(text = "retracted publications"))
hc$legend(enabled = FALSE)
hc$tooltip(pointFormat = "{point.y} records")
return(hc)
}
This code generates the cumulative timeline chart.
plotCumSumTimeline <- function(d, term) {
dates <- xpathSApply(d, "//PubmedData/History/PubMedPubDate[@PubStatus='entrez']/Year",
xmlValue)
df1 <- as.data.frame(table(dates), stringsAsFactors = FALSE)
hc <- Highcharts$new()
hc$title(text = "Cumulative sum of retracted publications by year of Entrez record creation")
hc$series(data = cumsum(df1$Freq), type = "column")
hc$xAxis(categories = df1$dates, labels = list(rotation = 90, formatter = paste("#! function() { return '<a href=\"http://www.pubmed.org/?term=%22",
term, "%22[PTYP] AND 1959:' + escape(this.value) + '[CRDT]\" target=\"_blank\">' + this.value + '</a>'; } !#",
sep = ""), useHTML = "true"), title = list(text = "year"))
hc$yAxis(title = list(text = "sum of retracted publications"))
hc$legend(enabled = FALSE)
hc$tooltip(pointFormat = "{point.y} records since 1959")
return(hc)
}
This code generates the retraction rate by year chart.
plotByYear <- function(d, total, term) {
dates <- xpathSApply(d, "//PubmedData/History/PubMedPubDate[@PubStatus='entrez']/Year",
xmlValue)
years <- as.numeric(dates)
ydf <- data.frame(year = min(years):max(years), total = NA, retracted = NA)
years.cnt <- as.data.frame(table(years), stringsAsFactors = FALSE)
m <- match(ydf$year, years.cnt$years)
ydf$retracted <- years.cnt[m, "Freq"]
ydf$retracted <- ifelse(is.na(ydf$retracted), 0, ydf$retracted)
m <- match(ydf$year, total$year)
ydf$total <- total[m, "total"]
hc <- Highcharts$new()
hc$title(text = "Retracted publications per 100 000 publications by year of Entrez record creation")
hc$series(data = as.numeric(sprintf("%.3f", (1e+05/ydf$total) * ydf$retracted)),
type = "column", events = list(click = "#! function() {window.open(this.options.url)} !#"))
hc$xAxis(categories = ydf$year, labels = list(rotation = 90, formatter = paste("#! function() { return '<a href=\"http://www.pubmed.org/?term=%22",
term, "%22[PTYP] AND ' + escape(this.value) + '[CRDT]\" target=\"_blank\">' + this.value + '</a>'; } !#",
sep = ""), useHTML = "true"), title = list(text = "year"))
hc$yAxis(title = list(text = "retracted publications per 100 000 publications"))
hc$legend(enabled = FALSE)
hc$tooltip(pointFormat = "{point.y} retracted records per 100 000 publication records")
return(hc)
}
This code generates the retractions by journal chart (absolute numbers).
plotByJournal <- function(d, term) {
journals <- xpathSApply(d, "//MedlineCitation/Article/Journal/ISOAbbreviation",
xmlValue)
journals.cnt <- as.data.frame(table(journals), stringsAsFactors = FALSE)
colnames(journals.cnt) <- c("journal", "count")
j20 <- head(journals.cnt[order(journals.cnt$count, decreasing = TRUE), ],
20)
hc <- Highcharts$new()
hc$chart(marginLeft = 220)
hc$series(data = j20$count, type = "bar")
hc$xAxis(categories = j20$journal, labels = list(formatter = paste("#! function() { return '<a href=\"http://www.pubmed.org/?term=%22",
term, "%22[PTYP] AND %22' + escape(this.value) + '%22[JOUR]\" target=\"_blank\">' + this.value + '</a>'; } !#",
sep = ""), useHTML = "true"))
hc$yAxis(title = list(text = "retracted publications"))
hc$legend(enabled = FALSE)
hc$tooltip(pointFormat = "{point.y} records")
return(hc)
}
This code generates the retractions by journal chart (relative to total publications for each journal).
plotByJournal <- function(d, term) {
d$idx <- as.numeric(sprintf("%.3f", (1e+05/d$total) * d$count))
d <- head(d[order(d$idx, decreasing = TRUE), ], 20)
hc <- Highcharts$new()
hc$chart(marginLeft = 220)
hc$series(data = d$idx, type = "bar")
hc$xAxis(categories = d$journal, labels = list(formatter = paste("#! function() { return '<a href=\"http://www.pubmed.org/?term=%22",
term, "%22[PTYP] AND %22' + escape(this.value) + '%22[JOUR]\" target=\"_blank\">' + this.value + '</a>'; } !#",
sep = ""), useHTML = "true"))
hc$yAxis(title = list(text = "retracted publications / 100 000 publications"))
hc$legend(enabled = FALSE)
hc$tooltip(pointFormat = "{point.y} retractions / 100 000 publications")
return(hc)
}